"""
爬取豆瓣前十名的电影的短评数量
"""

import requests
from bs4 import BeautifulSoup
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}


def get_top10_comments():
    # 获取Top250列表页
    url = "https://movie.douban.com/top250"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 提取前10部电影链接
    movies = soup.select('.item .hd a')[:10]

    for movie in movies:
        # 获取每部电影详情页
        detail_url = movie['href']
        detail_res = requests.get(detail_url, headers=headers)
        detail_soup = BeautifulSoup(detail_res.text, 'html.parser')

        # 提取电影标题和短评数量
        title = detail_soup.select_one('h1 span').text
        comment_count = detail_soup.select_one('.mod-hd .pl a').text.split(' ')[1]

        print(f"电影: {title} | 短评数量: {comment_count}")
        time.sleep(2)  # 礼貌延迟，避免被封


get_top10_comments()
